Fix and uncomment non-MMX version.

author Owen Taylor <otaylor@redhat.com>

Fri, 4 Jan 2002 00:34:06 +0000 (00:34 +0000)

committer Owen Taylor <otaylor@src.gnome.org>

Fri, 4 Jan 2002 00:34:06 +0000 (00:34 +0000)
author Owen Taylor <otaylor@redhat.com>
Fri, 4 Jan 2002 00:34:06 +0000 (00:34 +0000)
committer Owen Taylor <otaylor@src.gnome.org>
Fri, 4 Jan 2002 00:34:06 +0000 (00:34 +0000)
diff --git a/gdk-pixbuf/ChangeLog b/gdk-pixbuf/ChangeLog

index 9a8e370a394dc2176487072af16c15389cf7c862..390efd9fdc305c2a5a193892fc8dbcc5c27bf1d2 100644 (file)
--- a/gdk-pixbuf/ChangeLog
+++ b/gdk-pixbuf/ChangeLog
@@ -1,3 +1,17 @@
+Thu Jan  3 19:31:58 2002  Owen Taylor  <otaylor@redhat.com>
+
+       * pixops/pixops.c (scale_line_22_33): Fix and uncomment
+       non-MMX version.
+
+       * pixops/pixops.c (pixops_composite_nearest): Remove a
+       division.
+
+       * pixops/pixops.c (pixops_composite): Add some docs
+       about the parameters.
+
+       * pixops/README: Add notes about the correct algorithms
+       for alpha compositing and how to implement them quickly.
+
  2001-12-29  Tor Lillqvist  <tml@iki.fi>
  
         * Makefile.am (EXTRA_DIST): Add makefile.msc.
diff --git a/gdk-pixbuf/pixops/README b/gdk-pixbuf/pixops/README

index 957a0b3cbe7cfd4863837d998168d007b5cb7f4a..c79e0e2fdcc54fc019c8295eb32b9c1a6365b3a5 100644 (file)
--- a/gdk-pixbuf/pixops/README
+++ b/gdk-pixbuf/pixops/README
@@ -33,6 +33,61 @@ for a number of the most common special cases:
   compositing from RGBA to RGBx
   compositing against a color from RGBA and storing in a RGBx buffer
  
+Alpha compositing 8 bit RGBAa onto RGB is defined in terms of
+rounding the exact result (real values in [0,1]):
+
+ cc = ca * aa + (1 - aa) * Cb
+
+ Cc = ROUND [255. * (Ca/255. * Aa/255. + (1 - Aa/255.) * Cb/255.)]
+
+We can comp
+
+ROUND(i / 255.) can be computed exactly for i in [0,255*255] as:
+
+ t = i + 0x80; result = (t + (t >> 8)) >> 8;  [ call this as To8(i) ]
+
+So, 
+  
+ t = Ca * Aa + (255 - Aa) * Cb + 0x80;
+ Cc = (t + (t >> 8)) >> 8;
+
+Alpha compositing 8 bit RaGaBaAa onto RbGbBbAa is a little harder, for
+non-premultiplied alpha. The premultiplied result is simple:
+
+ ac = aa + (1 - aa) * ab
+ cc = ca + (1 - aa) * cb
+
+Which can be computed in integers terms as:
+
+ Cc = Ca + To8 ((255 - Aa) * Cb)
+ Ac = Aa + To8 ((255 - Aa) * Ab)
+
+For non-premultiplied alpha, we need divide the color components by 
+the alpha:
+
+       +- (ca * aa + (1 - aa) * ab * cb)) / ac; aa != 0
+  cc = |
+       +- cb; aa == 0
+
+To calculate this as in integer, we note the alternate form:
+
+ cc = cb + aa * (ca - cb) / ac
+
+[ 'cc = ca + (ac - aa) * (cb - ca) / ac' can also be useful numerically,
+  but isn't important here ]
+
+We can express this as integers as:
+
+ Ac_tmp = Aa * 255 + (255 - Aa) * Ab;
+ 
+      +- Cb + (255 * Aa * (Ca - Cb) + Ac_tmp / 2) / Ac_tmp ; Ca > Cb
+ Cc = | 
+      +- Cb - (255 * Aa * (Cb - Ca) + Ac_tmp / 2) / Ac_tmp ; ca <= Cb
+
+Or, playing bit tricks to avoid the conditional
+
+ Cc = Cb + (255 * Aa * (Ca - Cb) + (((Ca - Cb) >> 8) ^ (Ac_tmp / 2)) ) / Ac_tmp
+
  TODO
  ====
  
@@ -57,13 +112,13 @@ TODO
    the _nearest() variants do it right, most of the other code 
    is wrong to some degree or another.
  
-  For instance, in composite line, we have:
+  For instance, in composite_line_22_4a4(), we have:
  
      dest[0] = ((0xff0000 - a) * dest[0] + r) >> 24;
  
-   if a is 0, then we have:
+   if a is 0 (implies r == 0), then we have:
  
-    (0xff0000 * dest[0] + r) >> 24
+    (0xff0000 * dest[0]) >> 24
  
     which gives results which are 1 to low:
  
diff --git a/gdk-pixbuf/pixops/pixops.c b/gdk-pixbuf/pixops/pixops.c

index c9606107fb405ce0cbab97bdbf5e2151b3ea19e0..ef5afd74759fd117da74ecbf7ef1e337eeefe8fd 100644 (file)
--- a/gdk-pixbuf/pixops/pixops.c
+++ b/gdk-pixbuf/pixops/pixops.c
@@ -202,10 +202,14 @@ pixops_composite_nearest (guchar        *dest_buf,
                else
                  {
                    unsigned int a1 = 0xff - a0;
-                  
-                  dest[0] = (a0 * p[0] + a1 * dest[0]) / 0xff;
-                  dest[1] = (a0 * p[1] + a1 * dest[1]) / 0xff;
-                  dest[2] = (a0 * p[2] + a1 * dest[2]) / 0xff;
+                 unsigned int tmp;
+
+                 tmp = a0 * p[0] + a1 * dest[0] + 0x80;
+                  dest[0] = (tmp + (tmp >> 8)) >> 8;
+                 tmp = a0 * p[1] + a1 * dest[1] + 0x80;
+                  dest[1] = (tmp + (tmp >> 8)) >> 8;
+                 tmp = a0 * p[2] + a1 * dest[2] + 0x80;
+                  dest[2] = (tmp + (tmp >> 8)) >> 8;
                  }
                break;
              }
@@ -388,7 +392,7 @@ composite_line (int *weights, int n_x, int n_y,
        int *pixel_weights;
        
        pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y;
-
+      
        for (i=0; i<n_y; i++)
         {
           guchar *q = src[i] + x_scaled * src_channels;
@@ -837,10 +841,9 @@ scale_line_22_33_mmx_stub (int *weights, int n_x, int n_y,
  }
  #endif /* USE_MMX */
  
-#ifdef SCALE_LINE_22_33_USED /* This dead code would need changes if we wanted to use it */
  static guchar *
  scale_line_22_33 (int *weights, int n_x, int n_y,
-                 guchar *dest, guchar *dest_end, int dest_channels, int dest_has_alpha,
+                 guchar *dest, int dest_x, guchar *dest_end, int dest_channels, int dest_has_alpha,
                   guchar **src, int src_channels, gboolean src_has_alpha,
                   int x_init, int x_step, int src_width,
                   int check_size, guint32 color1, guint32 color2)
@@ -860,8 +863,8 @@ scale_line_22_33 (int *weights, int n_x, int n_y,
        q0 = src0 + x_scaled * 3;
        q1 = src1 + x_scaled * 3;
        
-      pixel_weights = (int *)((char *)weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS - 4)) & (SUBSAMPLE_MASK << 4)));
-      
+      pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * 4;
+
        w1 = pixel_weights[0];
        w2 = pixel_weights[1];
        w3 = pixel_weights[2];
@@ -883,9 +886,9 @@ scale_line_22_33 (int *weights, int n_x, int n_y,
        g += w4 * q1[5];
        b += w4 * q1[6];
  
-      dest[0] = r >> 16;
-      dest[1] = g >> 16;
-      dest[2] = b >> 16;
+      dest[0] = (r + 0x8000) >> 16;
+      dest[1] = (g + 0x8000) >> 16;
+      dest[2] = (b + 0x8000) >> 16;
        
        dest += 3;
        
@@ -894,7 +897,6 @@ scale_line_22_33 (int *weights, int n_x, int n_y,
    
    return dest;
  }
-#endif /* SCALE_LINE_22_33_USED */
  
  static void
  process_pixel (int *weights, int n_x, int n_y,
@@ -1227,7 +1229,7 @@ bilinear_make_fast_weights (PixopsFilter *filter, double x_scale, double y_scale
  
         for (i = 0; i < n_y; i++)
           for (j = 0; j < n_x; j++)
-           *(pixel_weights + n_x * i + j) = 65536 * x_weights[j] * x_scale * y_weights[i] * y_scale * overall_alpha;
+           *(pixel_weights + n_x * i + j) = 65536 * x_weights[j] * x_scale * y_weights[i] * y_scale * overall_alpha + 0.5;
        }
  
    g_free (x_weights);
@@ -1412,6 +1414,30 @@ pixops_composite_color (guchar         *dest_buf,
    g_free (filter.weights);
  }
  
+/**
+ * pixops_composite:
+ * @dest_buf: pointer to location to store result
+ * @render_x0: x0 of region of scaled source to store into @dest_buf
+ * @render_y0: y0 of region of scaled source to store into @dest_buf
+ * @render_x1: x1 of region of scaled source to store into @dest_buf
+ * @render_y1: x1 of region of scaled source to store into @dest_buf
+ * @dest_rowstride: rowstride of @dest_buf
+ * @dest_channels: number of channels in @dest_buf
+ * @dest_has_alpha: whether @dest_buf has alpha
+ * @src_buf: pointer to source pixels
+ * @src_width: width of source (used for clipping)
+ * @src_height: height of source (used for clipping)
+ * @src_rowstride: rowstride of source
+ * @src_channels: number of channels in @src_buf
+ * @src_has_alpha: whether @src_buf has alpha
+ * @scale_x: amount to scale source by in X direction
+ * @scale_y: amount to scale source by in Y direction
+ * @interp_type: type of enumeration
+ * @overall_alpha: overall alpha factor to multiply source by
+ * 
+ * Scale source buffer by scale_x / scale_y, then composite a given rectangle
+ * of the result into the destination buffer.
+ **/
  void
  pixops_composite (guchar        *dest_buf,
                   int            render_x0,
@@ -1550,12 +1576,16 @@ pixops_scale (guchar        *dest_buf,
        break;
      }
  
+  if (filter.n_x == 2 && filter.n_y == 2 && dest_channels == 3 && src_channels == 3)
+    {
  #ifdef USE_MMX
-  if (filter.n_x == 2 && filter.n_y == 2 &&
-      found_mmx && dest_channels == 3 && src_channels == 3)
-    line_func = scale_line_22_33_mmx_stub;
+      if (found_mmx)
+       line_func = scale_line_22_33_mmx_stub;
+      else
+#endif
+       line_func = scale_line_22_33;
+    }
    else
-#endif    
      line_func = scale_line;
    
    pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1,
diff --git a/gdk-pixbuf/pixops/scale_line_22_33_mmx.S b/gdk-pixbuf/pixops/scale_line_22_33_mmx.S

index 40dec48d3ea4bbcbceb6c009e62438c3606c8a40..8259a9db3d643c79b3ed90656cac5061691df606 100644 (file)
--- a/gdk-pixbuf/pixops/scale_line_22_33_mmx.S
+++ b/gdk-pixbuf/pixops/scale_line_22_33_mmx.S
@@ -55,6 +55,12 @@ _pixops_scale_line_22_33_mmx:
         cmpl %esi,28(%ebp)
         je   .out
  
+/* For the body of this loop, %mm01, %mm1, %mm2, %mm3 hold the 4 adjoining
+ * points we are interpolating between, as:
+ *
+ *  000000BB00GG00RR
+ */    
+       
  /* Load initial values into %mm1, %mm3 */
         leal (%edx,%edx,2),%edx  # Multiply by 3
  
@@ -82,11 +88,16 @@ _pixops_scale_line_22_33_mmx:
         jmp .newx
         .p2align 4,,7
  .loop:
-/* int x_index = (x & 0xf000) >> 12 */
+/* short *pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y
+ *                                             16             4                  0xf            2     2
+ */
         movl %ebx,%eax
         andl $0xf000,%eax
         shrl $7,%eax
  
+/* At this point, %edi holds weights. Load the 4 weights into %mm4,%mm5,%mm6,%mm7, multiply and
+ * accumulate.
+ */
         movq (%edi,%eax),%mm4
         pmullw %mm0,%mm4
         movq 8(%edi,%eax),%mm5
@@ -99,7 +110,17 @@ _pixops_scale_line_22_33_mmx:
         paddw %mm6, %mm7
         paddw %mm5, %mm7
  
+/* %mm7        holds the accumulated sum. Compute (C + 0x80) / 256
+ */
+       pxor %mm4, %mm4
+       movl $8421504, %eax  # 0x00808080
+       movd %eax, %mm6  
+       punpcklbw %mm4, %mm6
+       paddw %mm6, %mm7
         psrlw $8, %mm7
+
+/* Pack into %eax and store result
+ */    
         packuswb %mm7, %mm7
         movd %mm7, %eax
         
@@ -113,7 +134,7 @@ _pixops_scale_line_22_33_mmx:
  
  /* x += x_step; */
         addl 24(%ebp),%ebx
-/* x_scale = x >> 16; */
+/* x_scaled = x >> 16; */
         movl %ebx,%edx
         sarl $16,%edx
  
@@ -131,7 +152,6 @@ _pixops_scale_line_22_33_mmx:
         leal (%edx,%edx,2),%edx  # Multiply by 3
  
         movl 16(%ebp),%edi
-       pxor %mm4, %mm4
         movzbl 2(%edi,%edx),%ecx
         shll $16,%ecx
         movzwl (%edi,%edx),%eax
author	Owen Taylor <otaylor@redhat.com>
	Fri, 4 Jan 2002 00:34:06 +0000 (00:34 +0000)
committer	Owen Taylor <otaylor@src.gnome.org>
	Fri, 4 Jan 2002 00:34:06 +0000 (00:34 +0000)
gdk-pixbuf/ChangeLog		patch \| blob \| history
gdk-pixbuf/pixops/README		patch \| blob \| history
gdk-pixbuf/pixops/pixops.c		patch \| blob \| history
gdk-pixbuf/pixops/scale_line_22_33_mmx.S		patch \| blob \| history